---
title: "Vintage"
author: "Yanran"
date: "09/19/2022"
output: html_document
---

```{r library}
library(tidyverse)
library(tidycensus)
library(tigris)
library(sp)
library(stringr)
library(haven)
library(reshape2)
library(rstudioapi)
library(dplyr)
library(RColorBrewer)
library(purrr)
```



```{r}
CT0825 <- read.csv(file = 'nhgis_ppdd_20220825_p_tract.csv')
CT.ma <- CT0825 %>% filter(state == "25")

racemg<-data.frame('race'=c('total','white','black','am_indian','asian','pac_islander','hispanic'),'racevar'=c('H76','H9I', 'H9B', 'H9C', 'H9D', 'H9E', 'H9H'))
nums65<-str_pad(c(3:19,27:43),width=3,side='left',pad='0')
vnames<-c(paste0(rep(racemg$racevar[1:nrow(racemg)],each=length(nums65)),nums65,"_dp"))

```



```{r}
## create fips code ##

CT.ma$GEOID <- as.numeric(paste0(substr(CT.ma$gisjoin,2,3),substr(CT.ma$gisjoin,5,7), substr(CT.ma$gisjoin,9,nchar(CT.ma$gisjoin))))

## subset to only variables we will use for denominator ##
CT.ma<-CT.ma[,c('GEOID',vnames)]

## put in long form and organize ##
CT.ma<-melt(data = CT.ma, id.vars = "GEOID", measure.vars = vnames,factorsAsStrings = T)
CT.ma$variable<-as.character(CT.ma$variable)

## racevar is a variable that tells us what racegroup the row represents ##
CT.ma$racevar<-substr(CT.ma$variable,1,3)
## vnum is a variable that tells us the sex and age group the row represents ##
CT.ma$vnum<-as.numeric(substr(CT.ma$variable,5,6))

## create a dataset with proper race and sex variables and an age group variable with consistent groupings (called rxsxa) based on the racevar and vnum variables ##
## age ranges ##
agecat<-paste0('Age',c('0-4','5-9','10-14','15-17','18-19','20-24','25-29','30-34','35-44','45-54','55-64'))

rxsxa<-CT.ma[!duplicated(CT.ma[,c('racevar','vnum')]),c('racevar','vnum')]
rxsxa<-rxsxa[order(rxsxa$racevar,rxsxa$vnum),]

realign_fineage<-c(1:5,6,6,6,7,8,9,9,10,10,11,11,11)
rxsxa$age<-rep(realign_fineage,2*(length(unique(rxsxa$racevar))))
rxsxa$sex<-rep(rep(c('m','f'),each=length(realign_fineage)),length(unique(rxsxa$racevar)))
rxsxa<-merge(rxsxa,data.frame('age'=1:length(agecat),'agecat'=agecat),by='age')
rxsxa<-merge(rxsxa,racemg,by='racevar')

CT.ma<-merge(CT.ma,rxsxa,by=c('racevar','vnum'))

## need to now aggregate population size and MOE where applicable due to the inconsistent age groupings ##
CT.ma<-aggregate(value~GEOID+race+agecat+sex,data=CT.ma,
                 FUN=sum,na.rm=T)
names(CT.ma)[5]<-'dp0825_pop'
ma_dp0825 <- CT.ma
save(ma_dp0825,file='dp0825_data.RData')

rm(list=ls())

#######################################
## 4. merge ACS, census, and dp data ##
#######################################


#setwd("~/Desktop/Research/Rachel/census_diff_privacy-master/vintage-05-27")

## read each processed dataset ##
load('acs_data.RData')

load('ce_data.RData')

load('dp_data.RData')

load('dp0527_data.RData')

load('dp22_data.RData')

## merge them by fips code, race, age, and sex ##
mg1<-merge(ma_acs,ma_ce,by=c('GEOID','race','agecat','sex'))

adat<-merge(mg1,ma_dp,by=c('GEOID','race','agecat','sex')) %>% merge(dat.dp,by=c('GEOID','race','agecat','sex'))


adat<-merge(adat,CT.ma,by=c('GEOID','race','agecat','sex'))
#temp <- adat

adat<-merge(adat,ma_dp0825,by=c('GEOID','race','agecat','sex'))

save(adat,file='merged_denom_cov_data5.RData')
rm(list=ls())

load('merged_denom_cov_data5.RData')
```




